home *** CD-ROM | disk | FTP | other *** search
- /*
- * File: fstranl.r
- * String analysis functions: any,bal,find,many,match,upto
- *
- * str_anal is a macro for performing the standard conversions and
- * defaulting for string analysis functions. It takes as arguments the
- * parameters for subject, beginning position, and ending position. It
- * produces declarations for these 3 names prepended with cnv_. These
- * variables will contain the converted versions of the arguments.
- *
- * Originated by kwalker 6/27/89. Enhanced/revised by cjeffery
- */
- #begdef str_anal(s, i, j)
- declare {
- C_integer cnv_ ## i;
- C_integer cnv_ ## j;
- }
-
- abstract {
- return integer
- }
-
- if is:null(s) then {
- inline {
- s = k_subject;
- }
- if is:null(i) then inline {
- cnv_ ## i = k_pos;
- }
- }
- else {
- if !cnv:string(s) then
- runerr(103,s)
- if is:null(i) then inline {
- cnv_ ## i = 1;
- }
- }
-
- if !is:null(i) then
- if cnv:C_integer(i,cnv_ ## i) then inline {
- if ((cnv_ ## i = cvpos(cnv_ ## i, StrLen(s))) == CvtFail)
- fail;
- }
- else
- runerr(101,i)
-
-
- if is:null(j) then inline {
- cnv_ ## j = StrLen(s) + 1;
- }
- else if cnv:C_integer(j,cnv_ ## j) then inline {
- if ((cnv_ ## j = cvpos(cnv_ ## j, StrLen(s))) == CvtFail)
- fail;
- if (cnv_ ## i > cnv_ ## j) {
- register C_integer tmp;
- tmp = cnv_ ## i;
- cnv_ ## i = cnv_ ## j;
- cnv_ ## j = tmp;
- }
- }
- else
- runerr(101,j)
-
- #enddef
-
-
- "any(c,s,i1,i2) - produces i1+1 if i2 is greater than 1 and s[i] is contained "
- "in c and poseq(i2,x) is greater than poseq(i1,x), but fails otherwise."
-
- function{0,1} any(c,s,i,j)
- str_anal( s, i, j )
- if !cnv:tmp_cset(c) then
- runerr(104,c)
- body {
- if (cnv_i == cnv_j)
- fail;
- if (!Testb(StrLoc(s)[cnv_i-1], c))
- fail;
- return C_integer cnv_i+1;
- }
- end
-
-
- "bal(c1,c2,c3,s,i1,i2) - generates the sequence of integer positions in s up to"
- " a character of c1 in s[i1:i2] that is balanced with respect to characters in"
- " c2 and c3, but fails if there is no such position."
-
- function{*} bal(c1,c2,c3,s,i,j)
- str_anal( s, i, j )
- if !def:tmp_cset(c1,fullcs) then
- runerr(104,c1)
- if !def:tmp_cset(c2,lparcs) then
- runerr(104,c2)
- if !def:tmp_cset(c3,rparcs) then
- runerr(104,c3)
-
- body {
- C_integer cnt;
- char c;
-
- /*
- * Loop through characters in s[i:j]. When a character in c2
- * is found, increment cnt; when a character in c3 is found, decrement
- * cnt. When cnt is 0 there have been an equal number of occurrences
- * of characters in c2 and c3, i.e., the string to the left of
- * i is balanced. If the string is balanced and the current character
- * (s[i]) is in c, suspend with i. Note that if cnt drops below
- * zero, bal fails.
- */
- cnt = 0;
- while (cnv_i < cnv_j) {
- c = ToAscii(StrLoc(s)[cnv_i-1]);
- if (cnt == 0 && Testb(c, c1)) {
- suspend C_integer cnv_i;
- }
- if (Testb(c, c2))
- cnt++;
- else if (Testb(c, c3))
- cnt--;
- if (cnt < 0)
- fail;
- cnv_i++;
- }
- /*
- * Eventually fail.
- */
- fail;
- }
- end
-
-
- "find(s1,s2,i1,i2) - generates the sequence of positions in s2 at which "
- "s1 occurs as a substring in s2[i1:i2], but fails if there is no such position."
-
- function{*} find(s1,s2,i,j)
- str_anal( s2, i, j )
- if !cnv:string(s1) then
- runerr(103,s1)
-
- body {
- register char *str1, *str2;
- C_integer s1_len, l, term;
-
- /*
- * Loop through s2[i:j] trying to find s1 at each point, stopping
- * when the remaining portion s2[i:j] is too short to contain s1.
- * Optimize me!
- */
- s1_len = StrLen(s1);
- term = cnv_j - s1_len;
- while (cnv_i <= term) {
- str1 = StrLoc(s1);
- str2 = StrLoc(s2) + cnv_i - 1;
- l = s1_len;
-
- /*
- * Compare strings on a byte-wise basis; if the end is reached
- * before inequality is found, suspend with the position of the
- * string.
- */
- do {
- if (l-- <= 0) {
- suspend C_integer cnv_i;
- break;
- }
- } while (*str1++ == *str2++);
- cnv_i++;
- }
- fail;
- }
- end
-
-
- "many(c,s,i1,i2) - produces the position in s after the longest initial "
- "sequence of characters in c in s[i1:i2] but fails if there is none."
-
- function{0,1} many(c,s,i,j)
- str_anal( s, i, j )
- if !cnv:tmp_cset(c) then
- runerr(104,c)
- body {
- /*
- * Fail if first character of Arg2[i:j] is not in Arg1.
- */
- if (!Testb(ToAscii(StrLoc(s)[cnv_i-1]), c)) {
- fail;
- }
- /*
- * Move i along Arg2[i:j] until a character that is not in Arg1 is found
- * or the end of the string is reached.
- */
- cnv_i++;
- while (cnv_i < cnv_j) {
- if (!Testb(ToAscii(StrLoc(s)[cnv_i-1]), c))
- break;
- cnv_i++;
- }
- /*
- * Return the position of the first character not in c.
- */
- return C_integer cnv_i;
- }
- end
-
-
- "match(s1,s2,i1,i2) - produces i1+*s1 if s1==s2[i1+:*s1], but fails otherwise."
-
- function{0,1} match(s1,s2,i,j)
- str_anal( s2, i, j )
- if !cnv:tmp_string(s1) then
- runerr(103,s1)
- body {
- char *str1, *str2;
-
- /*
- * Cannot match unless s2[i:j] is as long as s1.
- */
- if (cnv_j - cnv_i < StrLen(s1))
- fail;
-
- /*
- * Compare s1 with s2[i:j] for *s1 characters; fail if an
- * inequality is found.
- */
- str1 = StrLoc(s1);
- str2 = StrLoc(s2) + cnv_i - 1;
- for (cnv_j = StrLen(s1); cnv_j > 0; cnv_j--)
- if (*str1++ != *str2++)
- fail;
-
- /*
- * Return position of end of matched string in s2.
- */
- return C_integer cnv_i + StrLen(s1);
- }
- end
-
-
- "upto(c,s,i1,i2) - generates the sequence of integer positions in s up to a "
- "character in c in s[i2:i2], but fails if there is no such position."
-
- function{*} upto(c,s,i,j)
- str_anal( s, i, j )
- if !cnv:tmp_cset(c) then
- runerr(104,c)
- body {
- C_integer tmp;
-
- /*
- * Look through s[i:j] and suspend position of each occurrence of
- * of a character in c.
- */
- while (cnv_i < cnv_j) {
- tmp = (C_integer)ToAscii(StrLoc(s)[cnv_i-1]);
- if (Testb(tmp, c)) {
- suspend C_integer cnv_i;
- }
- cnv_i++;
- }
- /*
- * Eventually fail.
- */
- fail;
- }
- end
-